
<!doctype html>
<html lang="en" class="no-js">
  <head>
    
      <meta charset="utf-8">
      <meta name="viewport" content="width=device-width,initial-scale=1">
      
        <meta name="description" content="Documentation for TPOT, a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.">
      
      
        <meta name="author" content="Randal S. Olson">
      
      
        <link rel="canonical" href="http://epistasislab.github.io/tpot/api/">
      
      
        <link rel="prev" href="../using/">
      
      
        <link rel="next" href="../examples/">
      
      <link rel="icon" href="../assets/images/favicon.png">
      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-9.1.4">
    
    
      
        <title>TPOT API - TPOT</title>
      
    
    
      <link rel="stylesheet" href="../assets/stylesheets/main.240905d7.min.css">
      
        
        <link rel="stylesheet" href="../assets/stylesheets/palette.a0c5b2b5.min.css">
      
      

    
    
    
      
        
        
        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
      
    
    
    <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
    
      

    
    
    
  </head>
  
  
    
    
      
    
    
    
    
    <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="" data-md-color-accent="">
  
    
    
      <script>var palette=__md_get("__palette");if(palette&&"object"==typeof palette.color)for(var key of Object.keys(palette.color))document.body.setAttribute("data-md-color-"+key,palette.color[key])</script>
    
    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
    <label class="md-overlay" for="__drawer"></label>
    <div data-md-component="skip">
      
        
        <a href="#tpot-api" class="md-skip">
          Skip to content
        </a>
      
    </div>
    <div data-md-component="announce">
      
    </div>
    
    
      

  

<header class="md-header md-header--shadow" data-md-component="header">
  <nav class="md-header__inner md-grid" aria-label="Header">
    <a href=".." title="TPOT" class="md-header__button md-logo" aria-label="TPOT" data-md-component="logo">
      
  
  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>

    </a>
    <label class="md-header__button md-icon" for="__drawer">
      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
    </label>
    <div class="md-header__title" data-md-component="header-title">
      <div class="md-header__ellipsis">
        <div class="md-header__topic">
          <span class="md-ellipsis">
            TPOT
          </span>
        </div>
        <div class="md-header__topic" data-md-component="header-topic">
          <span class="md-ellipsis">
            
              TPOT API
            
          </span>
        </div>
      </div>
    </div>
    
      <form class="md-header__option" data-md-component="palette">
        
          
          <input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="" data-md-color-accent=""  aria-label="Switch to dark mode"  type="radio" name="__palette" id="__palette_1">
          
            <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg>
            </label>
          
        
          
          <input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="" data-md-color-accent=""  aria-label="Switch to light mode"  type="radio" name="__palette" id="__palette_2">
          
            <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12c0-2.42-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg>
            </label>
          
        
      </form>
    
    
    
      <label class="md-header__button md-icon" for="__search">
        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
      </label>
      <div class="md-search" data-md-component="search" role="dialog">
  <label class="md-search__overlay" for="__search"></label>
  <div class="md-search__inner" role="search">
    <form class="md-search__form" name="search">
      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
      <label class="md-search__icon md-icon" for="__search">
        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
      </label>
      <nav class="md-search__options" aria-label="Search">
        
        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
        </button>
      </nav>
      
    </form>
    <div class="md-search__output">
      <div class="md-search__scrollwrap" data-md-scrollfix>
        <div class="md-search-result" data-md-component="search-result">
          <div class="md-search-result__meta">
            Initializing search
          </div>
          <ol class="md-search-result__list" role="presentation"></ol>
        </div>
      </div>
    </div>
  </div>
</div>
    
    
      <div class="md-header__source">
        <a href="https://github.com/epistasislab/tpot" title="Go to repository" class="md-source" data-md-component="source">
  <div class="md-source__icon md-icon">
    
    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.3.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81z"/></svg>
  </div>
  <div class="md-source__repository">
    GitHub
  </div>
</a>
      </div>
    
  </nav>
  
</header>
    
    <div class="md-container" data-md-component="container">
      
      
        
          
        
      
      <main class="md-main" data-md-component="main">
        <div class="md-main__inner md-grid">
          
            
              
              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
                <div class="md-sidebar__scrollwrap">
                  <div class="md-sidebar__inner">
                    


  

<nav class="md-nav md-nav--primary md-nav--integrated" aria-label="Navigation" data-md-level="0">
  <label class="md-nav__title" for="__drawer">
    <a href=".." title="TPOT" class="md-nav__button md-logo" aria-label="TPOT" data-md-component="logo">
      
  
  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>

    </a>
    TPOT
  </label>
  
    <div class="md-nav__source">
      <a href="https://github.com/epistasislab/tpot" title="Go to repository" class="md-source" data-md-component="source">
  <div class="md-source__icon md-icon">
    
    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.3.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81z"/></svg>
  </div>
  <div class="md-source__repository">
    GitHub
  </div>
</a>
    </div>
  
  <ul class="md-nav__list" data-md-scrollfix>
    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href=".." class="md-nav__link">
        Home
      </a>
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../installing/" class="md-nav__link">
        Installation
      </a>
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../using/" class="md-nav__link">
        Using TPOT
      </a>
    </li>
  

    
      
      
      

  
  
    
  
  
    <li class="md-nav__item md-nav__item--active">
      
      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
      
      
        
      
      
        <label class="md-nav__link md-nav__link--active" for="__toc">
          TPOT API
          <span class="md-nav__icon md-icon"></span>
        </label>
      
      <a href="./" class="md-nav__link md-nav__link--active">
        TPOT API
      </a>
      
        

<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
  
  
  
    
  
  
    <label class="md-nav__title" for="__toc">
      <span class="md-nav__icon md-icon"></span>
      Table of contents
    </label>
    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
      
        <li class="md-nav__item">
  <a href="#classification" class="md-nav__link">
    Classification
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#regression" class="md-nav__link">
    Regression
  </a>
  
</li>
      
    </ul>
  
</nav>
      
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../examples/" class="md-nav__link">
        Examples
      </a>
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../contributing/" class="md-nav__link">
        Contributing
      </a>
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../releases/" class="md-nav__link">
        Release Notes
      </a>
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../citing/" class="md-nav__link">
        Citing TPOT
      </a>
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../support/" class="md-nav__link">
        Support
      </a>
    </li>
  

    
      
      
      

  
  
  
    <li class="md-nav__item">
      <a href="../related/" class="md-nav__link">
        Related
      </a>
    </li>
  

    
  </ul>
</nav>
                  </div>
                </div>
              </div>
            
            
          
          
            <div class="md-content" data-md-component="content">
              <article class="md-content__inner md-typeset">
                
                  

  
  


<h1 id="tpot-api">TPOT API</h1>
<h2 id="classification">Classification</h2>
<pre><em>class</em> tpot.<strong style="color:#008AB8">TPOTClassifier</strong>(<em><strong>generations</strong>=100, <strong>population_size</strong>=100,
                          <strong>offspring_size</strong>=None, <strong>mutation_rate</strong>=0.9,
                          <strong>crossover_rate</strong>=0.1,
                          <strong>scoring</strong>='accuracy', <strong>cv</strong>=5,
                          <strong>subsample</strong>=1.0, <strong>n_jobs</strong>=1,
                          <strong>max_time_mins</strong>=None, <strong>max_eval_time_mins</strong>=5,
                          <strong>random_state</strong>=None, <strong>config_dict</strong>=None,
                          <strong>template</strong>=None,
                          <strong>warm_start</strong>=False,
                          <strong>memory</strong>=None,
                          <strong>use_dask</strong>=False,
                          <strong>periodic_checkpoint_folder</strong>=None,
                          <strong>early_stop</strong>=None,
                          <strong>verbosity</strong>=0,
                          <strong>disable_update_check</strong>=False,
                          <strong>log_file</strong>=None
                          </em>)</pre>
<div align="right"><a href="https://github.com/EpistasisLab/tpot/blob/master/tpot/base.py">source</a></div>

<p>Automated machine learning for supervised classification tasks.</p>
<p>The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models,
preprocessors, feature selection techniques, and any other estimator or transformer that follows the <a href="http://scikit-learn.org/stable/developers/contributing.html#apis-of-scikit-learn-objects">scikit-learn API</a>.
The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.</p>
<p>By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.
However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the <code>config_dict</code> parameter.</p>
<p>Read more in the <a href="using/#tpot-with-code">User Guide</a>.</p>
<table>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>generations</strong>: int or None optional (default=100)
<blockquote>
Number of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
<br /><br />
Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
<br /><br />
TPOT will evaluate <em>population_size</em> + <em>generations</em> × <em>offspring_size</em> pipelines in total.
</blockquote>

<strong>population_size</strong>: int, optional (default=100)
<blockquote>
Number of individuals to retain in the genetic programming population every generation. Must be a positive number.
<br /><br />
Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
</blockquote>

<strong>offspring_size</strong>: int, optional (default=None)
<blockquote>
Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
</blockquote>

<strong>mutation_rate</strong>: float, optional (default=0.9)
<blockquote>
Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.
<br /><br />
<em>mutation_rate</em> + <em>crossover_rate</em> cannot exceed 1.0.
<br /><br />
We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.
</blockquote>

<strong>crossover_rate</strong>: float, optional (default=0.1)
<blockquote>
Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to "breed" every generation.
<br /><br />
<em>mutation_rate</em> + <em>crossover_rate</em> cannot exceed 1.0.
<br /><br />
We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.
</blockquote>

<strong>scoring</strong>: string or callable, optional (default='accuracy')
<blockquote>
Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used:
<br /><br/>
'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'precision' etc. (suffixes apply as with ‘f1’), 'recall' etc. (suffixes apply as with ‘f1’), ‘jaccard’ etc. (suffixes apply as with ‘f1’), 'roc_auc', ‘roc_auc_ovr’, ‘roc_auc_ovo’, ‘roc_auc_ovr_weighted’, ‘roc_auc_ovo_weighted’
<br /><br/>
If you would like to use a custom scorer, you can pass the callable object/function with signature <em>scorer(estimator, X, y)</em>.
<br /><br/>
See the section on <a href="../using/#scoring-functions">scoring functions</a> for more details.

</blockquote>

<strong>cv</strong>: int, cross-validation generator, or an iterable, optional (default=5)
<blockquote>
Cross-validation strategy used when evaluating pipelines.
<br /><br />
Possible inputs:
<ul>
<li>integer, to specify the number of folds in an unshuffled StratifiedKFold,</li>
<li>An object to be used as a cross-validation generator, or</li>
<li>An iterable yielding train/test splits.</li>
</blockquote>

<strong>subsample</strong>: float, optional (default=1.0)
<blockquote>
Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].
<br /><br />
Setting <em>subsample</em>=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.
</blockquote>

<strong>n_jobs</strong>: integer, optional (default=1)
<blockquote>
Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process.
<br /><br />
Setting <em>n_jobs</em>=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets.
</blockquote>

<strong>max_time_mins</strong>: integer or None, optional (default=None)
<blockquote>
How many minutes TPOT has to optimize the pipeline.
<br /><br />
If not None, this setting will allow TPOT to run until <em>max_time_mins</em> minutes elapsed and then stop. TPOT will stop earlier if <em>generations</em> is set and all generations are already evaluated.
</blockquote>

<strong>max_eval_time_mins</strong>: float, optional (default=5)
<blockquote>
How many minutes TPOT has to evaluate a single pipeline.
<br /><br />
Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.
</blockquote>

<strong>random_state</strong>: integer or None, optional (default=None)
<blockquote>
The seed of the pseudo random number generator used in TPOT.
<br /><br />
Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.
</blockquote>

<strong>config_dict</strong>: Python dictionary, string, or None, optional (default=None)
<blockquote>
A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.
<br /><br />
Possible inputs are:
<ul>
<li>Python dictionary, TPOT will use your custom configuration,</li>
<li>string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or</li>
<li>string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or</li>
<li>string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or</li>
<li>None, TPOT will use the default TPOTClassifier configuration.</li>
</ul>
See the <a href="../using/#built-in-tpot-configurations">built-in configurations</a> section for the list of configurations included with TPOT, and the <a href="../using/#customizing-tpots-operators-and-parameters">custom configuration</a> section for more information and examples of how to create your own TPOT configurations.
</blockquote>

<strong>template</strong>: string (default=None)
<blockquote>
Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.
<br /><br />

So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by "-", e.g. "SelectPercentile-Transformer-Classifier". By default value of template is None, TPOT generates tree-based pipeline randomly.

See the <a href="../using/#template-option-in-tpot"> template option in tpot</a> section for more details.
</blockquote>

<strong>warm_start</strong>: boolean, optional (default=False)
<blockquote>
Flag indicating whether the TPOT instance will reuse the population from previous calls to <em>fit()</em>.
<br /><br />
Setting <em>warm_start</em>=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.
</blockquote>

<strong>memory</strong>: a joblib.Memory object or string, optional (default=None)
<blockquote>
If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in <a href="http://scikit-learn.org/stable/modules/pipeline.html#caching-transformers-avoid-repeated-computation">scikit-learn documentation</a>
<br /><br />
Possible inputs are:
<ul>
<li>String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or</li>
<li>Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or</li>
<li>Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or</li>
<li>None, TPOT does not use memory caching.</li>
</ul>
</blockquote>

<strong>use_dask</strong>: boolean, optional (default: False)
<blockquote>
Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting
the same estimator on the same split of data multiple times. It
will also provide more detailed diagnostics when using Dask's
distributed scheduler.
<br /><br />
See <a href="https://dask-ml.readthedocs.io/en/latest/hyper-parameter-search.html#avoid-repeated-work">avoid repeated work</a> for more details.
</blockquote>

<strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
<blockquote>
If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
Currently once per generation but not more often than once per 30 seconds.<br /><br />
Useful in multiple cases:
<ul>
<li>Sudden death before TPOT could save optimized pipeline</li>
<li>Track its progress</li>
<li>Grab pipelines while it's still optimizing</li>
</ul>
</blockquote>

<strong>early_stop</strong>: integer, optional (default: None)
<blockquote>
How many generations TPOT checks whether there is no improvement in optimization process.
<br /><br />
Ends the optimization process if there is no improvement in the given number of generations.
</blockquote>

<strong>verbosity</strong>: integer, optional (default=0)
<blockquote>
How much information TPOT communicates while it's running.
<br /><br />
Possible inputs are:
<ul>
<li>0, TPOT will print nothing,</li>
<li>1, TPOT will print minimal information,</li>
<li>2, TPOT will print more information and provide a progress bar, or</li>
<li>3, TPOT will print everything and provide a progress bar.</li>
</ul>
</blockquote>

<strong>disable_update_check</strong>: boolean, optional (default=False)
<blockquote>
Flag indicating whether the TPOT version checker should be disabled.
<br /><br />
The update checker will tell you when a new version of TPOT has been released.
</blockquote>

<strong>log_file</strong>: file-like class (io.TextIOWrapper or io.StringIO) or string, optional (default: None)
<br /><br />
<blockquote>
Save progress content to a file.
If it is a string for the path and file name of the desired output file,
TPOT will create the file and write log into it.
If it is None, TPOT will output log into sys.stdout
</blockquote>

</td>
</tr>

<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Attributes:</strong></td>
<td width="80%" style="background:white;">
<strong>fitted_pipeline_</strong>: scikit-learn Pipeline object
<blockquote>
The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.
</blockquote>

<strong>pareto_front_fitted_pipelines_</strong>: Python dictionary
<blockquote>
Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.
<br /><br />
The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.
<br /><br />
Note: <em>pareto_front_fitted_pipelines_</em> is only available when <em>verbosity</em>=3.
</blockquote>

<strong>evaluated_individuals_</strong>: Python dictionary
<blockquote>
Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).
<br /><br />
This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.
</blockquote>
</td>
<tr>
</table>

<p><strong>Example</strong></p>
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">from</span> <span class="nn">tpot</span> <span class="kn">import</span> <span class="n">TPOTClassifier</span>
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">load_digits</span>
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">train_test_split</span>
<a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a>
<a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="n">digits</span> <span class="o">=</span> <span class="n">load_digits</span><span class="p">()</span>
<a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="n">X_train</span><span class="p">,</span> <span class="n">X_test</span><span class="p">,</span> <span class="n">y_train</span><span class="p">,</span> <span class="n">y_test</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">digits</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> <span class="n">digits</span><span class="o">.</span><span class="n">target</span><span class="p">,</span>
<a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a>                                                    <span class="n">train_size</span><span class="o">=</span><span class="mf">0.75</span><span class="p">,</span> <span class="n">test_size</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
<a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a>
<a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="n">tpot</span> <span class="o">=</span> <span class="n">TPOTClassifier</span><span class="p">(</span><span class="n">generations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">population_size</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">verbosity</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a><span class="n">tpot</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_train</span><span class="p">,</span> <span class="n">y_train</span><span class="p">)</span>
<a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a><span class="nb">print</span><span class="p">(</span><span class="n">tpot</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_test</span><span class="p">,</span> <span class="n">y_test</span><span class="p">))</span>
<a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a><span class="n">tpot</span><span class="o">.</span><span class="n">export</span><span class="p">(</span><span class="s1">&#39;tpot_digits_pipeline.py&#39;</span><span class="p">)</span>
</code></pre></div>
<p><strong>Functions</strong></p>
<table width="100%">
<tr>
<td width="25%"><a href="#tpotclassifier-fit">fit</a>(features, classes[, sample_weight, groups])</td>
<td>Run the TPOT optimization process on the given training data.</td>
</tr>

<tr>
<td><a href="#tpotclassifier-predict">predict</a>(features)</td>
<td>Use the optimized pipeline to predict the classes for a feature set.</td>
</tr>

<tr>
<td><a href="#tpotclassifier-predict-proba">predict_proba</a>(features)</td>
<td>Use the optimized pipeline to estimate the class probabilities for a feature set.</td>
</tr>

<tr>
<td><a href="#tpotclassifier-score">score</a>(testing_features, testing_classes)</td>
<td>Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.</td>
</tr>

<tr>
<td><a href="#tpotclassifier-export">export</a>(output_file_name)</td>
<td>Export the optimized pipeline as Python code.</td>
</tr>
</table>

<p><a name="tpotclassifier-fit"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="n">fit</span><span class="p">(</span><span class="n">features</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">groups</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Run the TPOT optimization process on the given training data.
<br /><br />
Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>features</strong>: array-like {n_samples, n_features}
<blockquote>
Feature matrix
<br /><br />
TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.
As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)
using <a href="http://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html">median value imputation</a>.
<br /><br />
If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.
</blockquote>

<strong>classes</strong>: array-like {n_samples}
<blockquote>
List of class labels for prediction
</blockquote>

<strong>sample_weight</strong>: array-like {n_samples}, optional
<blockquote>
Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.
</blockquote>

<strong>groups</strong>: array-like, with shape {n_samples, }, optional
<blockquote>
Group labels for the samples used when performing cross-validation.
<br /><br />
This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as <a href="http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GroupKFold.html">sklearn.model_selection.GroupKFold</a>.
</blockquote>
</td>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>self</strong>: object
<blockquote>
Returns a copy of the fitted TPOT object
</blockquote>
</td>
</tr>
</table>
</div>

<p><a name="tpotclassifier-predict"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="n">predict</span><span class="p">(</span><span class="n">features</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Use the optimized pipeline to predict the classes for a feature set.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>features</strong>: array-like {n_samples, n_features}
<blockquote>
Feature matrix
</blockquote>
</td>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>predictions</strong>: array-like {n_samples}
<blockquote>
Predicted classes for the samples in the feature matrix
</blockquote>
</td>
</tr>
</table>
</div>

<p><a name="tpotclassifier-predict-proba"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="n">predict_proba</span><span class="p">(</span><span class="n">features</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Use the optimized pipeline to estimate the class probabilities for a feature set.
<br /><br />
Note: This function will only work for pipelines whose final classifier supports the <em>predict_proba</em> function. TPOT will raise an error otherwise.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>features</strong>: array-like {n_samples, n_features}
<blockquote>
Feature matrix
</blockquote>
</td>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>predictions</strong>: array-like {n_samples, n_classes}
<blockquote>
The class probabilities of the input samples
</blockquote>
</td>
</tr>
</table>
</div>

<p><a name="tpotclassifier-score"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="n">score</span><span class="p">(</span><span class="n">testing_features</span><span class="p">,</span> <span class="n">testing_classes</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.
<br /><br />
The default scoring function for TPOTClassifier is 'accuracy'.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>testing_features</strong>: array-like {n_samples, n_features}
<blockquote>
Feature matrix of the testing set
</blockquote>

<strong>testing_classes</strong>: array-like {n_samples}
<blockquote>
List of class labels for prediction in the testing set
</blockquote>
</td>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>accuracy_score</strong>: float
<blockquote>
The estimated test set accuracy according to the user-specified scoring function.
</blockquote>
</td>
</tr>
</table>
</div>

<p><a name="tpotclassifier-export"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="n">export</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">,</span> <span class="n">data_file_path</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Export the optimized pipeline as Python code.
<br /><br />
See the <a href="../using/#tpot-with-code">usage documentation</a> for example usage of the export function.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>output_file_name</strong>: string
<blockquote>
String containing the path and file name of the desired output file
</blockquote>
<strong>data_file_path</strong>: string
<blockquote>
By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.
</blockquote>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>exported_code_string</strong>: string
<blockquote>
The whole pipeline text as a string should be returned if output_file_name is not specified.
</blockquote>
</td>
</tr>
</table>
</div>

<h2 id="regression">Regression</h2>
<pre><em>class</em> tpot.<strong style="color:#008AB8">TPOTRegressor</strong>(<em><strong>generations</strong>=100, <strong>population_size</strong>=100,
                         <strong>offspring_size</strong>=None, <strong>mutation_rate</strong>=0.9,
                         <strong>crossover_rate</strong>=0.1,
                         <strong>scoring</strong>='neg_mean_squared_error', <strong>cv</strong>=5,
                         <strong>subsample</strong>=1.0, <strong>n_jobs</strong>=1,
                         <strong>max_time_mins</strong>=None, <strong>max_eval_time_mins</strong>=5,
                         <strong>random_state</strong>=None, <strong>config_dict</strong>=None,
                         <strong>template</strong>=None,
                         <strong>warm_start</strong>=False,
                         <strong>memory</strong>=None,
                         <strong>use_dask</strong>=False,
                         <strong>periodic_checkpoint_folder</strong>=None,
                         <strong>early_stop</strong>=None,
                         <strong>verbosity</strong>=0,
                         <strong>disable_update_check</strong>=False</em>)</pre>
<div align="right"><a href="https://github.com/EpistasisLab/tpot/blob/master/tpot/base.py">source</a></div>

<p>Automated machine learning for supervised regression tasks.</p>
<p>The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models,
preprocessors, feature selection techniques, and any other estimator or transformer that follows the <a href="http://scikit-learn.org/stable/developers/contributing.html#apis-of-scikit-learn-objects">scikit-learn API</a>.
The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.</p>
<p>By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.
However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the <code>config_dict</code> parameter.</p>
<p>Read more in the <a href="using/#tpot-with-code">User Guide</a>.</p>
<table>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>generations</strong>: int or None, optional (default=100)
<blockquote>
Number of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
<br /><br />
Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
<br /><br />
TPOT will evaluate <em>population_size</em> + <em>generations</em> × <em>offspring_size</em> pipelines in total.
</blockquote>

<strong>population_size</strong>: int, optional (default=100)
<blockquote>
Number of individuals to retain in the genetic programming population every generation. Must be a positive number.
<br /><br />
Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
</blockquote>

<strong>offspring_size</strong>: int, optional (default=None)
<blockquote>
Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
</blockquote>

<strong>mutation_rate</strong>: float, optional (default=0.9)
<blockquote>
Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.
<br /><br />
<em>mutation_rate</em> + <em>crossover_rate</em> cannot exceed 1.0.
<br /><br />
We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.
</blockquote>

<strong>crossover_rate</strong>: float, optional (default=0.1)
<blockquote>
Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to "breed" every generation.
<br /><br />
<em>mutation_rate</em> + <em>crossover_rate</em> cannot exceed 1.0.
<br /><br />
We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.
</blockquote>

<strong>scoring</strong>: string or callable, optional (default='neg_mean_squared_error')
<blockquote>
Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used:
<br /><br/>
'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2'
<br /><br/>
Note that we recommend using the <em>neg</em> version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric.
<br /><br/>
If you would like to use a custom scorer, you can pass the callable object/function with signature <em>scorer(estimator, X, y)</em>.
<br /><br/>
See the section on <a href="../using/#scoring-functions">scoring functions</a> for more details.
</blockquote>

<strong>cv</strong>: int, cross-validation generator, or an iterable, optional (default=5)
<blockquote>
Cross-validation strategy used when evaluating pipelines.
<br /><br />
Possible inputs:
<ul>
<li>integer, to specify the number of folds in an unshuffled KFold,</li>
<li>An object to be used as a cross-validation generator, or</li>
<li>An iterable yielding train/test splits.</li>
</ul>
</blockquote>

<strong>subsample</strong>: float, optional (default=1.0)
<blockquote>
Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].
<br /><br />
Setting <em>subsample</em>=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.
</blockquote>

<strong>n_jobs</strong>: integer, optional (default=1)
<blockquote>
Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process.
<br /><br />
Setting <em>n_jobs</em>=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets
</blockquote>

<strong>max_time_mins</strong>: integer or None, optional (default=None)
<blockquote>
How many minutes TPOT has to optimize the pipeline.
<br /><br />
If not None, this setting will allow TPOT to run until <em>max_time_mins</em> minutes elapsed and then stop. TPOT will stop earlier if <em>generations</em> is set and all generations are already evaluated.
</blockquote>

<strong>max_eval_time_mins</strong>: float, optional (default=5)
<blockquote>
How many minutes TPOT has to evaluate a single pipeline.
<br /><br />
Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.
</blockquote>

<strong>random_state</strong>: integer or None, optional (default=None)
<blockquote>
The seed of the pseudo random number generator used in TPOT.
<br /><br />
Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.
</blockquote>

<strong>config_dict</strong>: Python dictionary, string, or None, optional (default=None)
<blockquote>
A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.
<br /><br />
Possible inputs are:
<ul>
<li>Python dictionary, TPOT will use your custom configuration,</li>
<li>string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or</li>
<li>string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or</li>
<li>string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or</li>
<li>None, TPOT will use the default TPOTRegressor configuration.</li>
</ul>
See the <a href="../using/#built-in-tpot-configurations">built-in configurations</a> section for the list of configurations included with TPOT, and the <a href="../using/#customizing-tpots-operators-and-parameters">custom configuration</a> section for more information and examples of how to create your own TPOT configurations.
</blockquote>

<strong>template</strong>: string (default=None)
<blockquote>
Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.
<br /><br />

So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by "-", e.g. "SelectPercentile-Transformer-Regressor". By default value of template is None, TPOT generates tree-based pipeline randomly.

See the <a href="../using/#template-option-in-tpot"> template option in tpot</a> section for more details.
</blockquote>

<strong>warm_start</strong>: boolean, optional (default=False)
<blockquote>
Flag indicating whether the TPOT instance will reuse the population from previous calls to <em>fit()</em>.
<br /><br />
Setting <em>warm_start</em>=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.
</blockquote>

<strong>memory</strong>: a joblib.Memory object or string, optional (default=None)
<blockquote>
If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in <a href="http://scikit-learn.org/stable/modules/pipeline.html#caching-transformers-avoid-repeated-computation">scikit-learn documentation</a>
<br /><br />
Possible inputs are:
<ul>
<li>String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or</li>
<li>Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or</li>
<li>Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or</li>
<li>None, TPOT does not use memory caching.</li>
</ul>
</blockquote>

<strong>use_dask</strong>: boolean, optional (default: False)
<blockquote>
Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting
the same estimator on the same split of data multiple times. It
will also provide more detailed diagnostics when using Dask's
distributed scheduler.
<br /><br />
See <a href="https://dask-ml.readthedocs.io/en/latest/hyper-parameter-search.html#avoid-repeated-work">avoid repeated work</a> for more details.
</blockquote>

<strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
<blockquote>
If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
Currently once per generation but not more often than once per 30 seconds.<br /><br />
Useful in multiple cases:
<ul>
<li>Sudden death before TPOT could save optimized pipeline</li>
<li>Track its progress</li>
<li>Grab pipelines while it's still optimizing</li>
</ul>
</blockquote>

<strong>early_stop</strong>: integer, optional (default: None)
<blockquote>
How many generations TPOT checks whether there is no improvement in optimization process.
<br /><br />
Ends the optimization process if there is no improvement in the given number of generations.
</blockquote>

<strong>verbosity</strong>: integer, optional (default=0)
<blockquote>
How much information TPOT communicates while it's running.
<br /><br />
Possible inputs are:
<ul>
<li>0, TPOT will print nothing,</li>
<li>1, TPOT will print minimal information,</li>
<li>2, TPOT will print more information and provide a progress bar, or</li>
<li>3, TPOT will print everything and provide a progress bar.</li>
</ul>
</blockquote>

<strong>disable_update_check</strong>: boolean, optional (default=False)
<blockquote>
Flag indicating whether the TPOT version checker should be disabled.
<br /><br />
The update checker will tell you when a new version of TPOT has been released.
</blockquote>
</td>
</tr>

<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Attributes:</strong></td>
<td width="80%" style="background:white;">
<strong>fitted_pipeline_</strong>: scikit-learn Pipeline object
<blockquote>
The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.
</blockquote>

<strong>pareto_front_fitted_pipelines_</strong>: Python dictionary
<blockquote>
Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.
<br /><br />
The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.
<br /><br />
Note: <em>_pareto_front_fitted_pipelines</em> is only available when <em>verbosity</em>=3.
</blockquote>

<strong>evaluated_individuals_</strong>: Python dictionary
<blockquote>
Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).
<br /><br />
This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.
</blockquote>
</td>
<tr>
</table>

<p><strong>Example</strong></p>
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="kn">from</span> <span class="nn">tpot</span> <span class="kn">import</span> <span class="n">TPOTRegressor</span>
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">load_boston</span>
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">train_test_split</span>
<a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a>
<a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="n">digits</span> <span class="o">=</span> <span class="n">load_boston</span><span class="p">()</span>
<a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a><span class="n">X_train</span><span class="p">,</span> <span class="n">X_test</span><span class="p">,</span> <span class="n">y_train</span><span class="p">,</span> <span class="n">y_test</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">digits</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> <span class="n">digits</span><span class="o">.</span><span class="n">target</span><span class="p">,</span>
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a>                                                    <span class="n">train_size</span><span class="o">=</span><span class="mf">0.75</span><span class="p">,</span> <span class="n">test_size</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
<a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a>
<a id="__codelineno-6-9" name="__codelineno-6-9" href="#__codelineno-6-9"></a><span class="n">tpot</span> <span class="o">=</span> <span class="n">TPOTRegressor</span><span class="p">(</span><span class="n">generations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">population_size</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">verbosity</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<a id="__codelineno-6-10" name="__codelineno-6-10" href="#__codelineno-6-10"></a><span class="n">tpot</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_train</span><span class="p">,</span> <span class="n">y_train</span><span class="p">)</span>
<a id="__codelineno-6-11" name="__codelineno-6-11" href="#__codelineno-6-11"></a><span class="nb">print</span><span class="p">(</span><span class="n">tpot</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_test</span><span class="p">,</span> <span class="n">y_test</span><span class="p">))</span>
<a id="__codelineno-6-12" name="__codelineno-6-12" href="#__codelineno-6-12"></a><span class="n">tpot</span><span class="o">.</span><span class="n">export</span><span class="p">(</span><span class="s1">&#39;tpot_boston_pipeline.py&#39;</span><span class="p">)</span>
</code></pre></div>
<p><strong>Functions</strong></p>
<table width="100%">
<tr>
<td width="25%"><a href="#tpotregressor-fit">fit</a>(features, target[, sample_weight, groups])</td>
<td>Run the TPOT optimization process on the given training data.</td>
</tr>

<tr>
<td><a href="#tpotregressor-predict">predict</a>(features)</td>
<td>Use the optimized pipeline to predict the target values for a feature set.</td>
</tr>

<tr>
<td><a href="#tpotregressor-score">score</a>(testing_features, testing_target)</td>
<td>Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.</td>
</tr>

<tr>
<td><a href="#tpotregressor-export">export</a>(output_file_name)</td>
<td>Export the optimized pipeline as Python code.</td>
</tr>
</table>

<p><a name="tpotregressor-fit"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="n">fit</span><span class="p">(</span><span class="n">features</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">groups</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Run the TPOT optimization process on the given training data.
<br /><br />
Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>features</strong>: array-like {n_samples, n_features}
<blockquote>
Feature matrix
<br /><br />
TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.
As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)
using <a href="http://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html">median value imputation</a>.
<br /><br />
If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.
</blockquote>

<strong>target</strong>: array-like {n_samples}
<blockquote>
List of target labels for prediction
</blockquote>

<strong>sample_weight</strong>: array-like {n_samples}, optional
<blockquote>
Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.
</blockquote>

<strong>groups</strong>: array-like, with shape {n_samples, }, optional
<blockquote>
Group labels for the samples used when performing cross-validation.
<br /><br />
This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as <a href="http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GroupKFold.html">sklearn.model_selection.GroupKFold</a>.
</blockquote>
</td>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>self</strong>: object
<blockquote>
Returns a copy of the fitted TPOT object
</blockquote>
</td>
</tr>
</table>
</div>

<p><a name="tpotregressor-predict"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="n">predict</span><span class="p">(</span><span class="n">features</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Use the optimized pipeline to predict the target values for a feature set.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>features</strong>: array-like {n_samples, n_features}
<blockquote>
Feature matrix
</blockquote>
</td>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>predictions</strong>: array-like {n_samples}
<blockquote>
Predicted target values for the samples in the feature matrix
</blockquote>
</td>
</tr>
</table>
</div>

<p><a name="tpotregressor-score"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="n">score</span><span class="p">(</span><span class="n">testing_features</span><span class="p">,</span> <span class="n">testing_target</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.
<br /><br />
The default scoring function for TPOTRegressor is 'mean_squared_error'.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>testing_features</strong>: array-like {n_samples, n_features}
<blockquote>
Feature matrix of the testing set
</blockquote>

<strong>testing_target</strong>: array-like {n_samples}
<blockquote>
List of target labels for prediction in the testing set
</blockquote>
</td>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>accuracy_score</strong>: float
<blockquote>
The estimated test set accuracy according to the user-specified scoring function.
</blockquote>
</td>
</tr>
</table>
</div>

<p><a name="tpotregressor-export"></a>
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a><span class="n">export</span><span class="p">(</span><span class="n">output_file_name</span><span class="p">)</span>
</code></pre></div></p>
<div style="padding-left:5%" width="100%">
Export the optimized pipeline as Python code.
<br /><br />
See the <a href="../using/#tpot-with-code">usage documentation</a> for example usage of the export function.
<br /><br />
<table width="100%">
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
<td width="80%" style="background:white;">
<strong>output_file_name</strong>: string
<blockquote>
String containing the path and file name of the desired output file
</blockquote>
<strong>data_file_path</strong>: string
<blockquote>
By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.
</blockquote>
</tr>
<tr>
<td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
<td width="80%" style="background:white;">
<strong>exported_code_string</strong>: string
<blockquote>
The whole pipeline text as a string should be returned if output_file_name is not specified.
</blockquote>
</td>
</tr>
</table>
</div>





                
              </article>
            </div>
          
          
        </div>
        
      </main>
      
        <footer class="md-footer">
  
  <div class="md-footer-meta md-typeset">
    <div class="md-footer-meta__inner md-grid">
      <div class="md-copyright">
  
    <div class="md-copyright__highlight">
      Developed by <a href="http://www.randalolson.com">Randal S. Olson</a> and others at the University of Pennsylvania
    </div>
  
  
    Made with
    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
      Material for MkDocs
    </a>
  
</div>
      
    </div>
  </div>
</footer>
      
    </div>
    <div class="md-dialog" data-md-component="dialog">
      <div class="md-dialog__inner md-typeset"></div>
    </div>
    
    <script id="__config" type="application/json">{"base": "..", "features": ["toc.integrate"], "search": "../assets/javascripts/workers/search.208ed371.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
    
    
      <script src="../assets/javascripts/bundle.19047be9.min.js"></script>
      
    
  </body>
</html>